# Run through 06-14 product module level RData and create product x channel code ranks by revenue 
# JHL 

### PRELIMINARIES 
list.of.packages <- c("folderfun", "data.table", "bit64", "lubridate", "foreign")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages,repos = "http://cran.us.r-project.org")

require(folderfun) 
require(data.table)
require(bit64)
require(lubridate)
require(foreign)

# 1-100 
myID <- as.numeric(Sys.getenv("SLURM_ARRAY_TASK_ID"))

# 06-14 
# Set paths 
# Repository contains RMS data from 06-14 merged to module level in RData format as "move"
setff("rms_move")
repository <- ffrms_move()
setff("path_home")
path_home <- ffpath_home()

do_path <- sprintf("%s/do",path_home)
dta_path <- sprintf("%s/raw/nielsen",path_home)
save_path <- sprintf("%s/dta/nielsen/pranks",path_home)

# Create RData data table of file names and modules
if (!file.exists(sprintf("%s/module_0614_files.RData",do_path))) {
	
	 # Much more efficient listing method using system call
		# Can also sort by size with -S, then run arrays based on size 
	 ptm <- proc.time()
	 setwd(repository)
	 # List files in repository by descending order of size
	 file.table <- system("ls *.RData -S", intern = TRUE)
	 file.table <- lapply(as.list(file.table), function(x) paste(repository,"/",x,sep=""))
	 file.table <- data.table(unlist(file.table))
	 file.table[,id:=.I]

	 names(file.table) <- c("file_name", "id") # Name data table 
	 
	file.table[ ,file:=tstrsplit(file_name,"/",fixed=T)[[length(tstrsplit(file_name,"/",fixed=T))]] ]
	 # Sample: "/project2/databases/nielsen/nielsen_extracts/RMS/Movement_Files_Combined_RFormatted/1040.RData"
	 # Extract only digits: the module
	 file.table[,module:=as.numeric(gsub("[^0-9]","",file))]

	 save(file.table, file = sprintf("%s/module_0614_files.RData",do_path)) # Save to RData for quick loading 		 
	 proc.time() - ptm

}

load(sprintf("%s/module_0614_files.RData",do_path)) 

### 1. Create product x channel code ranks by revenue ### 
quant = quantile(unique(file.table$id),seq(0,1,by=0.01))
file.table = file.table[id>=quant[[myID]] & id<=quant[[myID+1]],]	 

for (m in file.table[,module]){

	if(file.exists(sprintf('%s/%s_prank.rds',save_path,m))){
		print(sprintf('%s Skip',m))
		next 
	}
	
	load(file.table[module==m,file_name])

	# Merge with channel code 
	setkey(move,store_code_uc,year)

	stores_0615 <- data.table(read.dta(sprintf("%s/stores_0615.dta",dta_path)))
	setkey(stores_0615,store_code_uc,year)
	move <- move[stores_0615[,.(store_code_uc,year,channel_code)], nomatch = 0]

	# Create revenue and add module 
	move[,revenue:=units*price/prmult]

	# Collapse by product x channel code revenue, rank
	setkey(move,upc,upc_ver_uc,channel_code) # Set key as barcode x channel code
	prank <- move[, list(total_revenue = sum(revenue)), by = key(move)]
	rm(move)
	prank <- prank[,rank:=rank(-total_revenue),by=channel_code]
	prank <- prank[order(channel_code,rank)]
	prank[,module:=m]

	# saveRDS(prank,sprintf('%s/%s_prank.rds',save_path,m))
	write.csv(prank, file = sprintf('%s/%s_prank.csv',save_path,m))
	rm(prank)
	print(sprintf('%s Done',m))
	
	gc()
}


